rm(list = ls())
here::i_am(file.path("code", "05_camp_discrimination.R"))
library(here)
source(here("code", "config.R"))

library(MatchIt)

#################################################################################
# Baseline estimates

est_df <- read_parquet(here("data", "analysis", "analysis.parquet"))

res_nco <- feols(
  is_naacp ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df,
  split = ~ nco,
  cluster = ~ serial_num
)

res_training <- feols(
  is_naacp ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df,
  split = ~ training,
  cluster = ~ serial_num
)

p <- bind_rows(
  map_dfr(res_nco, broom::tidy, conf.int = TRUE, .id = "sample") |>
    mutate(
      outcome = "(a) Share of Black\nnon-commissioned officers",
      sample = factor(as.integer(substr(sample, nchar(sample), nchar(sample))),
                      levels = c(0, 1, 2),
                      labels = c("0 to 25%", "25 to 50%", "> 50%")
      )
    ),
  map_dfr(res_training, broom::tidy, conf.int = TRUE, .id = "sample") |>
    mutate(
      outcome = "(b) Military training\nfor Black soldiers",
      sample = factor(as.integer(substr(sample, nchar(sample), nchar(sample))),
                      levels = c(0, 1, 2),
                      labels = c("No troops", "Some", "All")
      )
    )
) |>
  ggplot(aes(x = sample, ymin = conf.low, ymax = conf.high, y = estimate)) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = .7) +
  my_theme() +
  labs(y = "Effect on NAACP\nmembership (TSLS)", x = "") +
  facet_wrap(~ outcome, scales = "free_x")

p_color <- p +
  geom_point(size = 2, color = "#F8766D") +
  geom_linerange(color = "#F8766D")
ggsave(file.path(fig_dir, "color", "camps.pdf"), plot = p_color, width = 6, height = 3)
p_bw <- p +
  geom_point(size = 2) +
  geom_linerange()

ggsave(file.path(fig_dir, "bw", "camps.pdf"), plot = p_bw, width = 6, height = 3)

#################################################################################
# Matched samples

est_df <- est_df |>
  mutate(
    birthyr_cards_quintile = cut(birthyr_cards, 5),
    nco0 = nco == 0,
    training0 = training == 0,
  )

est_df2 <- est_df |>
  filter(!is.na(qdiff_bw))

nco_match_obj2 <-
  matchit(
    nco0 ~ birthyr_cards_quintile + farmer + laborer + married_cards + exemption + qdiff_bw,
    data = est_df2 |> filter(!is.na(nco)),
    method = "exact",
    estimand = "ATT"
  )

matched_nco2 <- match.data(nco_match_obj2)

res_nco_matched2 <- feols(
  is_naacp ~ 1 |
    birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer |
    vet_combined ~ order_num_serial_norm,
  data = matched_nco2,
  split = ~ nco,
  weights = ~ weights,
  cluster = ~ serial_num
)

training_match_obj2 <-
  matchit(
    training0 ~ birthyr_cards_quintile + farmer + laborer + married_cards + exemption + qdiff_bw,
    data = est_df2 |> filter(!is.na(training)),
    method = "exact",
    estimand = "ATT"
  )

matched_training2 <- match.data(training_match_obj2)

res_training_matched2 <- feols(
  is_naacp ~ 1 |
    birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer |
    vet_combined ~ order_num_serial_norm,
  data = matched_training2,
  split = ~ training,
  weights = ~ weights,
  cluster = ~ serial_num
)

p_color <- bind_rows(
  bind_rows(
    map_dfr(res_nco, broom::tidy, conf.int = T, .id = "sample") |>
      mutate(
        matched = "No"
      ),
    map_dfr(res_nco_matched2, broom::tidy, conf.int = T, .id = "sample") |>
      mutate(
        matched = "Yes"
      )
  ) |>
    mutate(
      outcome = "(a) Share of Black\nnon-commissioned officers",
      sample = factor(
        as.integer(substr(sample, nchar(sample), nchar(sample))),
        levels = c(0, 1, 2),
        labels = c("0 to 25%", "25 to 50%", "> 50%")
      )
    ),
  bind_rows(
    map_dfr(
      res_training,
      broom::tidy,
      conf.int = T,
      .id = "sample"
    ) |>
      mutate(
        matched = "No"
      ),
    map_dfr(
      res_training_matched2,
      broom::tidy,
      conf.int = T,
      .id = "sample"
    ) |>
      mutate(
        matched = "Yes"
      )
  ) |>
    mutate(
      outcome = "(b) Military training\nfor Black soldiers",
      sample = factor(
        as.integer(substr(sample, nchar(sample), nchar(sample))),
        levels = c(0, 1, 2),
        labels = c("No troops", "Some", "All")
      )
    )
) |>
  ggplot(aes(
    x = sample,
    ymin = conf.low,
    ymax = conf.high,
    y = estimate,
    color = matched,
    shape = matched
  )) +
  geom_hline(yintercept = 0,
             linetype = "dashed",
             alpha = .7) +
  geom_point(size = 2, position = position_dodge2(.3)) +
  geom_linerange(position = position_dodge2(.3)) +
  my_theme() +
  labs(y = "Effect on NAACP\nmembership (TSLS)", x = "", color = "Matched sample", shape = "Matched sample") +
  facet_wrap( ~ outcome, scales = "free_x") +
  ylim(NA, .2)

ggsave(file.path(fig_dir, "color", "camps_matched.pdf"),
       plot = p_color,
       width = 6,
       height = 3.5)

p_bw <- p_color + 
  scale_color_manual(values = c("No" = "black", "Yes" = "#555"))

ggsave(file.path(fig_dir, "bw", "camps_matched.pdf"),
       plot = p_bw,
       width = 6,
       height = 3.5)

#################################################################################
# Camp fractionalization

est_df <- est_df |>
  group_by(camp, state_cards) |>
  mutate(
    n_camp_state = n()
  ) |>
  group_by(camp, region_cards) |>
  mutate(
    n_camp_region = n()
  ) |>
  group_by(camp, division_cards) |>
  mutate(
    n_camp_division = n()
  ) |>
  group_by(camp) |>
  mutate(
    share_camp_state_xself = (n_camp_state - 1) / (n() - 1),
    share_camp_state_grp = case_when(
      share_camp_state_xself < .25 ~ 1,
      share_camp_state_xself < .5 ~ 2,
      share_camp_state_xself < .75 ~ 3,
      TRUE ~ 4
    ),
    share_camp_region_xself = (n_camp_region - 1) / (n() - 1),
    share_camp_region_grp = case_when(
      share_camp_region_xself < .25 ~ 1,
      share_camp_region_xself < .5 ~ 2,
      share_camp_region_xself < .75 ~ 3,
      TRUE ~ 4
    ),
    share_camp_division_xself = (n_camp_division - 1) / (n() - 1),
    share_camp_division_grp = case_when(
      share_camp_division_xself < .25 ~ 1,
      share_camp_division_xself < .5 ~ 2,
      share_camp_division_xself < .75 ~ 3,
      TRUE ~ 4
    )
  )

res1 <- feols(
  is_naacp ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df,
  split = ~ share_camp_state_grp,
  cluster = ~ serial_num
)

res2 <- feols(
  is_naacp ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df,
  split = ~ share_camp_region_grp,
  cluster = ~ serial_num
)

res3 <- feols(
  is_naacp ~ 1 | birthyr_cards + bpl_cards + board_identifier + exemption^married_cards +
    farmer + laborer + farm_laborer | vet_combined ~ order_num_serial_norm,
  data = est_df,
  split = ~ share_camp_division_grp,
  cluster = ~ serial_num
)

p_color <- bind_rows(
  coeftable(res1),
  coeftable(res2),
  coeftable(res3)
) |>
  mutate(
    sample = factor(
      sample,
      levels = c(1, 2, 3, 4),
      labels = c("0-25%", "25-50%", "50-75%", "75-100%")
    ),
    sample.var = factor(
      sample.var,
      levels = c("share_camp_state_grp", "share_camp_division_grp", "share_camp_region_grp"),
      labels = c("State", "Division", "Region")
    )
  ) |>
  ggplot(aes(x = as.factor(sample), ymin = Estimate - 1.96*`Std. Error`, ymax = Estimate + 1.96*`Std. Error`, y = Estimate, color = sample.var, shape = sample.var)) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = .7) +
  geom_point(size = 2, position = position_dodge2(.5)) +
  geom_linerange(position = position_dodge2(.5)) +
  my_theme() +
  labs(x ="Share at camp from same area", y = "Effect on NAACP\nmembership (TSLS)", color = "", shape = "")
ggsave(file.path(fig_dir, "color", "iv_camp_fractionalization.pdf"), plot = p_color, width = 5, height = 3)

p_bw <- p_color +
  scale_color_manual(values = c("State" = "black", "Division" = "#555", "Region" = "#999"))
ggsave(file.path(fig_dir, "bw", "iv_camp_fractionalization.pdf"), plot = p_bw, width = 5, height = 3)
